Imports
library("mvtnorm")
2.1. Implementing GP Regression.
This first exercise will have you writing your own code for the Gaussian process regression model: y = f (x) + Epsilon with Epsilon ∼ N (0, σ2 n) and f ∼ GP(0, k(x, x′))
# Covariance function
SquaredExpKernel <- function(x1,x2,sigmaF=1,l=3){
n1 <- length(x1)
n2 <- length(x2)
K <- matrix(NA,n1,n2)
for (i in 1:n2){
K[,i] <- sigmaF^2*exp(-0.5*( (x1-x2[i])/l)^2 )
}
return(K)
}
posteriorGP <- function(X, y, XStar, sigmaNoise, k, ...){
n <- length(X)
K <- k(X, X, ...)
kStar <- k(X,XStar, ...)
#Cholesky
L <- t(chol(K + sigmaNoise^2*diag(n)))
#alpha
alpha <- solve(t(L),solve(L,y))
#Posterior mean = fStar
kStar <- k(X, XStar, ...)
fStar <- t(kStar)%*%alpha
#Posterior variance
v <- solve(L,kStar)
variance <- k(XStar, XStar, ...) - t(v)%*%v
return(list(mean =fStar, variance =variance))
}
#Hyperparameters
sigmaF <- 1
l <- 0.3
sigmaNoise <- 0.1
#observation
obs <- data.frame(X = 0.4, Y = 0.719)
#test points
XStar <- seq(-1,1,length=100)
posterior <- posteriorGP(obs$X, obs$Y, XStar, sigmaNoise, SquaredExpKernel, sigmaF, l)
# mean and posterior
posterior_mean <- posterior$mean
posterior_variance <- diag(posterior$variance)
# 95% confidence intervals
upper_bound <- posterior_mean + 1.96 * sqrt(posterior_variance)
lower_bound <- posterior_mean - 1.96 * sqrt(posterior_variance)
# Plot posterior mean and 95% confidence intervals
plot(XStar, posterior_mean, type = "l", col = "blue", lwd = 2,
ylim = c(min(lower_bound), max(upper_bound)),
xlab = "x", ylab = "f(x)", main = "Posterior Mean and 95% Confidence Bands of Single Observation")
lines(XStar, upper_bound, col = "red", lty = 2)
lines(XStar, lower_bound, col = "red", lty = 2)
points(obs$X, obs$Y, col = "black", pch = 19) # Plot the training point
(3) Update your posterior from (2) with another observation: (x,y) = (−0.6,−0.044). Plot the posterior mean of f over the interval x ∈ [−1, 1]. Plot also 95 % probability (point-wise) bands for f.
Hint: Updating the posterior after one observation with a new observation gives the same result as updating the prior directly with the two observations.
obs2 <- rbind(obs, data.frame(X= -0.6, Y = -0.044))
posterior <- posteriorGP(obs2$X, obs2$Y, XStar, sigmaNoise, SquaredExpKernel, sigmaF, l)
# mean and posterior
posterior_mean <- posterior$mean
posterior_variance <- diag(posterior$variance)
# 95% confidence intervals
upper_bound <- posterior_mean + 1.96 * sqrt(posterior_variance)
lower_bound <- posterior_mean - 1.96 * sqrt(posterior_variance)
# Plot posterior mean and 95% confidence intervals
plot(XStar, posterior_mean, type = "l", col = "blue", lwd = 2,
ylim = c(min(lower_bound), max(upper_bound)),
xlab = "x", ylab = "f(x)", main = "Posterior Mean and 95% Confidence Bands of Two Observations")
lines(XStar, upper_bound, col = "red", lty = 2)
lines(XStar, lower_bound, col = "red", lty = 2)
points(obs2$X, obs2$Y, col = "black", pch = 19) # Plot the training point
obs5 <- data.frame(X = c(-1,-0.6,-0.2,0.4,0.8),
Y = c(0.768,-0.044,-0.940,0.719,-0.664))
posterior <- posteriorGP(obs5$X, obs5$Y, XStar, sigmaNoise, SquaredExpKernel, sigmaF, l)
# mean and posterior
posterior_mean <- posterior$mean
posterior_variance <- diag(posterior$variance)
# 95% confidence intervals
upper_bound <- posterior_mean + 1.96 * sqrt(posterior_variance)
lower_bound <- posterior_mean - 1.96 * sqrt(posterior_variance)
# Plot posterior mean and 95% confidence intervals
plot(XStar, posterior_mean, type = "l", col = "blue", lwd = 2,
ylim = c(min(lower_bound), max(upper_bound)),
xlab = "x", ylab = "f(x)", main = "Posterior Mean and 95% Confidence Bands of Five Observations")
lines(XStar, upper_bound, col = "red", lty = 2)
lines(XStar, lower_bound, col = "red", lty = 2)
points(obs5$X, obs5$Y, col = "black", pch = 19) # Plot the training point
(5) Repeat (4), this time with hyperparameters σf = 1 and l = 1. Compare the results.
#changing the hyperparameters, the rest is the same .....
posterior <- posteriorGP(obs5$X, obs5$Y, XStar, sigmaNoise, SquaredExpKernel, sigmaF = 1, l = 1)
# mean and posterior
posterior_mean <- posterior$mean
posterior_variance <- diag(posterior$variance)
# 95% confidence intervals
upper_bound <- posterior_mean + 1.96 * sqrt(posterior_variance)
lower_bound <- posterior_mean - 1.96 * sqrt(posterior_variance)
# Plot posterior mean and 95% confidence intervals
plot(XStar, posterior_mean, type = "l", col = "blue", lwd = 2,
ylim = c(min(lower_bound), max(upper_bound)),
xlab = "x", ylab = "f(x)", main = "Posterior Mean and 95% Confidence Bands of Two Observations")
lines(XStar, upper_bound, col = "red", lty = 2)
lines(XStar, lower_bound, col = "red", lty = 2)
points(obs5$X, obs5$Y, col = "black", pch = 19) # Plot the training point
Imports and data structure
library(kernlab)
tempData <- read.csv("https://github.com/STIMALiU/AdvMLCourse/raw/master/GaussianProcess/Code/TempTullinge.csv", header=TRUE, sep=";")
tempData <- cbind(tempData, time = 1:nrow(tempData))
tempData <- cbind(tempData, day = ((tempData$time-1)%%365)+1)
trainData <- subset(tempData, (time - 1)%%5 == 0)
#nested Square Exponetial Kernel
nestedSEK <- function(sigmaF=1,l=3) {
fixedSEK <- function(x1,x2){
n1 <- length(x1)
n2 <- length(x2)
K <- matrix(NA,n1,n2)
for (i in 1:n2){
K[,i] <- sigmaF^2*exp(-0.5*( (x1-x2[i])/l)^2 )
}
return(K)
}
class(fixedSEK) <- 'kernel'
return(fixedSEK)
}
SEK <- nestedSEK()
#testing kernal function for x=1, xstar=2
SEK(1,2)
## [,1]
## [1,] 0.9459595
# kernel matrix where x = X, y = Xstar
kernelMatrix(kernel = SEK, x = c(1,3,4), y =c(2,3,4))
## An object of class "kernelMatrix"
## [,1] [,2] [,3]
## [1,] 0.9459595 0.8007374 0.6065307
## [2,] 0.9459595 1.0000000 0.9459595
## [3,] 0.8007374 0.9459595 1.0000000
#Estimating sigmaNoise from fitting a two degree polynomial to data
polyFit <- lm(trainData$temp ~ trainData$time + I(trainData$time^2))
sigmaNoise <- sd(polyFit$residuals)
#setting hyperparameters in kernel function
SEK <- nestedSEK(sigmaF = 20, l = 100)
modelGP <- gausspr(trainData$time, trainData$temp, scaled = FALSE, kernel = SEK, var = sigmaNoise^2)
posteriorMeanTime <- predict(modelGP)
plot(x= trainData$time, y = trainData$temp,
xlab = "time", ylab = "temp", main = "Temperature predictions", lwd = 1.5)
lines(x=trainData$time, y = posteriorMeanTime, col = "red", lwd = 3)
legend("bottomright", legend=c("Data", "Predictions"), pch=c(1, NA), lty=c(NA, 1), lwd=c(NA, 2), col=c("black", "red"))
#setting hyperparameters in kernel function
SEK <- nestedSEK(sigmaF = 20, l = 100)
#modelGP <- gausspr(trainData$time, trainData$temp, scaled = FALSE, kernel = SEK, var = sigmaNoise^2)
posterior <- posteriorGP(trainData$time, trainData$temp, trainData$time, sigmaNoise, SEK)
posteriorMean <- posterior$mean
posteriorVariance <- diag(posterior$variance)
# 95% confidence intervals
upper <- posteriorMean + 1.96 * sqrt(posteriorVariance)
lower <- posteriorMean - 1.96 * sqrt(posteriorVariance)
plot(x= trainData$time, y = trainData$temp,
xlab = "time", ylab = "temp", main = "Temperature predictions", lwd = 1.5)
lines(x=trainData$time, y = posteriorMean, col = "red", lwd = 3)
lines(trainData$time, upper, col = "blue", lwd = 1)
lines(trainData$time, lower, col = "blue", lwd = 1)
legend("bottomright", legend=c("Data", "Predictions", "Confidence Interval"), pch=c(1, NA, NA), lty=c(NA, 1, 1), lwd=c(NA, 2, 2), col=c("black", "red", "blue"))
SEK <- nestedSEK(sigmaF = 20, l = 100)
modelGP <- gausspr(trainData$day, trainData$temp, scaled = FALSE, kernel = SEK, var = sigmaNoise^2)
posteriorMeanDay <- predict(modelGP)
plot(x= trainData$time, y = trainData$temp,
xlab = "time", ylab = "temp", main = "Temperature predictions", lwd = 1.5)
lines(x=trainData$time, y = posteriorMeanTime, col = "red", lwd = 2)
lines(x=trainData$time, y = posteriorMeanDay, col = "blue", lwd = 2)
legend("bottomright", legend=c("Data", "Prediction Time", "Prediction Day"), pch=c(1, NA, NA), lty=c(NA, 1, 1), lwd=c(NA, 2, 2), col=c("black", "red", "blue"))
periodicKernel <- function(x, xstar, sigmaF = 20,l1 =1, l2 = 100, d=365){
absDiff <- abs(x-xstar)
return(sigmaF^2*exp(-2*sin(pi*absDiff/d)^2/l1^2)*exp(-1/2*absDiff^2/l2))
}
class(periodicKernel) <- 'kernel'
modelGP <- gausspr(trainData$time, trainData$temp, scaled = FALSE, kernel = periodicKernel, var = sigmaNoise^2)
posteriorMeanPeriodic <- predict(modelGP)
plot(x= trainData$time, y = trainData$temp,
xlab = "time", ylab = "temp", main = "Temperature predictions", lwd = 1.5)
lines(x=trainData$time, y = posteriorMeanPeriodic, col = "green", lwd = 2)
lines(x=trainData$time, y = posteriorMeanTime, col = "red", lwd = 2)
lines(x=trainData$time, y = posteriorMeanDay, col = "blue", lwd = 2)
legend("bottomright", legend=c("Data", "Prediction Time", "Prediction Day", "Prediction Periodic"), pch=c(1, NA, NA, NA), lty=c(NA, 1, 1, 1), lwd=c(NA, 2, 2, 2), col=c("black", "red", "blue", "green"))
Import & Data
#library(AtmRay)
#install.packages("https://cran.r-project.org/src/contrib/Archive/AtmRay/AtmRay_1.31.tar.gz", repos = NULL, type = "source")
library(kernlab)
library(AtmRay)
data <- read.csv("https://github.com/STIMALiU/AdvMLCourse/raw/master/GaussianProcess/Code/banknoteFraud.csv", header=FALSE, sep=",")
names(data) <- c("varWave","skewWave","kurtWave","entropyWave","fraud")
data[,5] <- as.factor(data[,5])
set.seed(111)
SelectTraining <- sample(1:dim(data)[1], size = 1000, replace = FALSE)
trainData <- data[SelectTraining,]
testData <- data[-SelectTraining,]
#accuracy function
accuracy <- function(true, pred){
return(mean(true == pred))
}
GPfitFraud <- gausspr(fraud ~ varWave + skewWave, data=trainData)
## Using automatic sigma estimation (sigest) for RBF or laplace kernel
predictionTrain <- predict(GPfitFraud)
table(trainData$fraud, predictionTrain)
## predictionTrain
## 0 1
## 0 503 41
## 1 18 438
accuracy(trainData$fraud, predictionTrain)
## [1] 0.941
# class probabilities
probPreds <- predict(GPfitFraud, type="probabilities")
x1 <- seq(min(trainData$varWave),max(trainData$varWave),length=100)
x2 <- seq(min(trainData$skewWave),max(trainData$skewWave),length=100)
gridPoints <- meshgrid(x1, x2)
gridPoints <- cbind(c(gridPoints$x), c(gridPoints$y))
gridPoints <- data.frame(gridPoints)
names(gridPoints) <- names(trainData)[1:2]
probPreds <- predict(GPfitFraud, gridPoints, type="probabilities")
# Plotting for Prob(setosa)
contour(x1,x2,matrix(probPreds[,1],100,byrow = TRUE), 20, xlab = "varSkew", ylab = "skewWave", main = 'Prob(fraud), fraud is red')
points(trainData[trainData$fraud==1,1],trainData[trainData$fraud==1,2],col="red")
points(trainData[trainData$fraud==0,1],trainData[trainData$fraud==0,2],col="blue")
predictionTest <- predict(GPfitFraud, testData[,c(1,2)])
accuracy(testData$fraud, predictionTest)
## [1] 0.9247312
GPfitFraud4 <- gausspr(fraud ~ ., data=trainData)
## Using automatic sigma estimation (sigest) for RBF or laplace kernel
prediction4 <- predict(GPfitFraud4, testData[,-5])
accuracy(testData$fraud, prediction4)
## [1] 0.9946237